Challenge 9

Author

Austin Liu

Data Import and Packages

library(tidyverse)
library(DT)

names <- read_csv(here::here("supporting_artifacts", 
                             "learning_targets",  
                             "Lab 9", 
                             "StateNames_A.csv"))
datatable(names)
Warning in instance$preRenderHook(instance): It seems your data is too big
for client-side DataTables. You may consider server-side processing: https://
rstudio.github.io/DT/server.html

Part 1: Summarizing and Visualizing Allisons

Question 1

names <- names |> 
  mutate(Sex = Gender)

allisonname <- names |> 
  select(Name, Year, State, Count, Sex) |> 
  group_by(State, Sex) |> 
  filter(Name == "Allison") |> 
  summarize(Count = sum(Count), .groups = "keep") |> 
  pivot_wider(names_from = Sex, values_from = Count) |> 
  mutate(M = coalesce(NA, 0))

knitr::kable(allisonname[, 1:3], 
             col.names = c("State", 
                           "Total Sum of Female", 
                           "Total Sum of Male" ),
             "html")
State Total Sum of Female Total Sum of Male
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
IN 3067 0
KS 1283 0
KY 1905 0
LA 1209 0
MA 2218 0
MD 2229 0
ME 340 0
MI 4014 0
MN 2374 0
MO 2882 0
MS 817 0
MT 226 0
NC 3435 0
ND 285 0
NE 807 0
NH 412 0
NJ 3052 0
NM 399 0
NV 729 0
NY 5747 0
OH 5487 0
OK 1421 0
OR 1186 0
PA 4307 0
RI 306 0
SC 1228 0
SD 376 0
TN 2488 0
TX 10192 0
UT 1125 0
VA 3220 0
VT 135 0
WA 1956 0
WI 2367 0
WV 813 0
WY 142 0

Question 2

allisonname_F <- names |> 
  filter(Name == "Allison", Sex == "F")

Question 3

allisonname_f_byYear <- allisonname_F |> 
  group_by(Year) |> 
  summarize(Count = sum(Count))

  ggplot(data = allisonname_f_byYear, mapping = aes(x = Year, y = Count)) +
  geom_col() +
  labs(title = 'Popularity of the name "Allison" over time')

Part 2: Modeling the Number of Allisons

Question 4

Model1 <- allisonname_f_byYear |> 
  lm(Count ~ Year, data = _)

Question 5

Model1 |> 
  ggplot(mapping = aes(y = Count, x = Year)) +
  geom_point() +
  stat_smooth(method = "lm")
`geom_smooth()` using formula 'y ~ x'

Question 6

lm(Count ~ Year, data = allisonname_f_byYear)

Call:
lm(formula = Count ~ Year, data = allisonname_f_byYear)

Coefficients:
(Intercept)         Year  
   209689.8       -101.5  
#Estimated Regression Equation: Count = 209689.8 - 101.5(Year)

Question 7

Model1 |> 
  broom::augment() |> 
  ggplot(mapping = aes(y = .resid, x = .fitted)) +
  geom_point()

In the plot of the residuals against the fitted values, we do not see any discernible pattern.

Question 8

Our model shows that the name Allison is declining in popularity. Allison is still quite popular as about 5000 newborn babies were given that name in our most recent year of data.

Part 3: Spelling by State

Question 1

alan_name_M <- names |> 
  filter(Sex == "M", Name %in% c("Allan", "Alan", "Allen")) |> 
  group_by(Year)

  ggplot(data = alan_name_M, mapping = aes(x = Year, y = Count)) +
  geom_col() +
  labs(title = 'Popularity of the name "Allen, Allan, Alan" over time')

Question 2

alannametotal <- alan_name_M |> 
  filter(Year == 2000, State %in% c("PA", "CA")) |> 
  pivot_wider(names_from = Name, values_from = Count) |> 
  select(Year, Sex, State, Alan, Allen, Allan)

knitr::kable(alannametotal[, 1:6], 
             col.names = c("Year", "Sex", "State",
                           "Count of Alan", 
                           "Count of Allen", 
                           "Count of Allan"),
             "html")
Year Sex State Count of Alan Count of Allen Count of Allan
2000 M CA 579 176 131
2000 M PA 51 56 12

Question 3

alannamepercent <- alan_name_M |> 
  filter(Year == 2000, State %in% c("PA", "CA")) |> 
  group_by(State) |> 
  mutate(Count = Count/sum(Count)) |> 
  pivot_wider(names_from = Name, values_from = Count) |> 
  select(Year, Sex, State, Alan, Allen, Allan)
  
knitr::kable(alannamepercent[, 1:6], 
             col.names = c("Year", "Sex", "State", 
                           "Count of Alan", 
                           "Count of Allen", 
                           "Count of Allan") , 
             "html") %>%
  kableExtra::kable_styling(latex_options = "striped", font_size = 13)%>%
  kableExtra::row_spec(1:2, color = 'white', background = 'black')
Year Sex State Count of Alan Count of Allen Count of Allan
2000 M CA 0.6534989 0.1986456 0.1478555
2000 M PA 0.4285714 0.4705882 0.1008403